summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorFernando Sahmkow <fsahmkow27@gmail.com>2023-12-30 03:36:24 +0100
committerLiam <byteslice@airmail.cc>2024-01-19 03:12:30 +0100
commit0adc09e0afcde345a5303efd73b3b7737245a7d9 (patch)
treeaa16b0c2434d5faaf78fdc780b3123a9cce73c6f
parentGPU SMMU: Expand to 34 bits (diff)
downloadyuzu-0adc09e0afcde345a5303efd73b3b7737245a7d9.tar
yuzu-0adc09e0afcde345a5303efd73b3b7737245a7d9.tar.gz
yuzu-0adc09e0afcde345a5303efd73b3b7737245a7d9.tar.bz2
yuzu-0adc09e0afcde345a5303efd73b3b7737245a7d9.tar.lz
yuzu-0adc09e0afcde345a5303efd73b3b7737245a7d9.tar.xz
yuzu-0adc09e0afcde345a5303efd73b3b7737245a7d9.tar.zst
yuzu-0adc09e0afcde345a5303efd73b3b7737245a7d9.zip
-rw-r--r--src/core/CMakeLists.txt2
-rw-r--r--src/core/device_memory_manager.inc8
-rw-r--r--src/core/hle/service/nvdrv/core/container.cpp62
-rw-r--r--src/core/hle/service/nvdrv/core/container.h4
-rw-r--r--src/core/hle/service/nvdrv/core/heap_mapper.cpp172
-rw-r--r--src/core/hle/service/nvdrv/core/heap_mapper.h48
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.cpp56
-rw-r--r--src/core/hle/service/nvdrv/core/nvmap.h2
-rw-r--r--src/video_core/gpu.cpp2
9 files changed, 329 insertions, 27 deletions
diff --git a/src/core/CMakeLists.txt b/src/core/CMakeLists.txt
index ca54eb6c6..0f713ead1 100644
--- a/src/core/CMakeLists.txt
+++ b/src/core/CMakeLists.txt
@@ -611,6 +611,8 @@ add_library(core STATIC
hle/service/ns/pdm_qry.h
hle/service/nvdrv/core/container.cpp
hle/service/nvdrv/core/container.h
+ hle/service/nvdrv/core/heap_mapper.cpp
+ hle/service/nvdrv/core/heap_mapper.h
hle/service/nvdrv/core/nvmap.cpp
hle/service/nvdrv/core/nvmap.h
hle/service/nvdrv/core/syncpoint_manager.cpp
diff --git a/src/core/device_memory_manager.inc b/src/core/device_memory_manager.inc
index b3a5f3d8b..138eb5017 100644
--- a/src/core/device_memory_manager.inc
+++ b/src/core/device_memory_manager.inc
@@ -20,10 +20,10 @@ namespace Core {
namespace {
-class PhysicalAddressContainer {
+class MultiAddressContainer {
public:
- PhysicalAddressContainer() = default;
- ~PhysicalAddressContainer() = default;
+ MultiAddressContainer() = default;
+ ~MultiAddressContainer() = default;
void GatherValues(u32 start_entry, Common::ScratchBuffer<u32>& buffer) {
buffer.resize(8);
@@ -145,7 +145,7 @@ struct DeviceMemoryManagerAllocator {
std::conditional_t<supports_pinning, Common::FlatAllocator<DAddr, 0, pin_bits>, EmptyAllocator>
pin_allocator;
Common::FlatAllocator<DAddr, 0, device_virtual_bits> main_allocator;
- PhysicalAddressContainer multi_dev_address;
+ MultiAddressContainer multi_dev_address;
/// Returns true when vaddr -> vaddr+size is fully contained in the buffer
template <bool pin_area>
diff --git a/src/core/hle/service/nvdrv/core/container.cpp b/src/core/hle/service/nvdrv/core/container.cpp
index e12ce05c1..ba7eb9e24 100644
--- a/src/core/hle/service/nvdrv/core/container.cpp
+++ b/src/core/hle/service/nvdrv/core/container.cpp
@@ -8,6 +8,7 @@
#include "core/hle/kernel/k_process.h"
#include "core/hle/service/nvdrv/core/container.h"
+#include "core/hle/service/nvdrv/core/heap_mapper.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/hle/service/nvdrv/core/syncpoint_manager.h"
#include "core/memory.h"
@@ -36,6 +37,14 @@ Container::~Container() = default;
size_t Container::OpenSession(Kernel::KProcess* process) {
std::scoped_lock lk(impl->session_guard);
+ for (auto& session : impl->sessions) {
+ if (!session.is_active) {
+ continue;
+ }
+ if (session.process == process) {
+ return session.id;
+ }
+ }
size_t new_id{};
auto* memory_interface = &process->GetMemory();
auto& smmu = impl->host1x.MemoryManager();
@@ -48,16 +57,65 @@ size_t Container::OpenSession(Kernel::KProcess* process) {
impl->sessions.emplace_back(new_id, process, smmu_id);
new_id = impl->new_ids++;
}
- LOG_CRITICAL(Debug, "Created Session {}", new_id);
+ auto& session = impl->sessions[new_id];
+ session.is_active = true;
+ // Optimization
+ if (process->IsApplication()) {
+ auto& page_table = process->GetPageTable().GetBasePageTable();
+ auto heap_start = page_table.GetHeapRegionStart();
+
+ Kernel::KProcessAddress cur_addr = heap_start;
+ size_t region_size = 0;
+ VAddr region_start = 0;
+ while (true) {
+ Kernel::KMemoryInfo mem_info{};
+ Kernel::Svc::PageInfo page_info{};
+ R_ASSERT(page_table.QueryInfo(std::addressof(mem_info), std::addressof(page_info),
+ cur_addr));
+ auto svc_mem_info = mem_info.GetSvcMemoryInfo();
+
+ // check if this memory block is heap
+ if (svc_mem_info.state == Kernel::Svc::MemoryState::Normal) {
+ if (svc_mem_info.size > region_size) {
+ region_size = svc_mem_info.size;
+ region_start = svc_mem_info.base_address;
+ }
+ }
+
+ // Check if we're done.
+ const uintptr_t next_address = svc_mem_info.base_address + svc_mem_info.size;
+ if (next_address <= GetInteger(cur_addr)) {
+ break;
+ }
+
+ cur_addr = next_address;
+ }
+ session.has_preallocated_area = false;
+ auto start_region = (region_size >> 15) >= 1024 ? smmu.Allocate(region_size) : 0;
+ if (start_region != 0) {
+ session.mapper = std::make_unique<HeapMapper>(region_start, start_region, region_size,
+ smmu_id, impl->host1x);
+ session.has_preallocated_area = true;
+ LOG_CRITICAL(Debug, "Preallocation created!");
+ }
+ }
return new_id;
}
void Container::CloseSession(size_t id) {
std::scoped_lock lk(impl->session_guard);
+ auto& session = impl->sessions[id];
auto& smmu = impl->host1x.MemoryManager();
+ if (session.has_preallocated_area) {
+ const DAddr region_start = session.mapper->GetRegionStart();
+ const size_t region_size = session.mapper->GetRegionSize();
+ session.mapper.reset();
+ smmu.Free(region_start, region_size);
+ session.has_preallocated_area = false;
+ }
+ session.is_active = false;
smmu.UnregisterProcess(impl->sessions[id].smmu_id);
impl->id_pool.emplace_front(id);
- LOG_CRITICAL(Debug, "Closed Session {}", id);
}
Session* Container::GetSession(size_t id) {
diff --git a/src/core/hle/service/nvdrv/core/container.h b/src/core/hle/service/nvdrv/core/container.h
index a1fd20199..86705cbc8 100644
--- a/src/core/hle/service/nvdrv/core/container.h
+++ b/src/core/hle/service/nvdrv/core/container.h
@@ -20,6 +20,7 @@ class Host1x;
namespace Service::Nvidia::NvCore {
+class HeapMapper;
class NvMap;
class SyncpointManager;
@@ -29,6 +30,9 @@ struct Session {
size_t id;
Kernel::KProcess* process;
size_t smmu_id;
+ bool has_preallocated_area{};
+ std::unique_ptr<HeapMapper> mapper{};
+ bool is_active{};
};
class Container {
diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.cpp b/src/core/hle/service/nvdrv/core/heap_mapper.cpp
new file mode 100644
index 000000000..59d993bc6
--- /dev/null
+++ b/src/core/hle/service/nvdrv/core/heap_mapper.cpp
@@ -0,0 +1,172 @@
+// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <mutex>
+
+#include <boost/container/small_vector.hpp>
+#define BOOST_NO_MT
+#include <boost/pool/detail/mutex.hpp>
+#undef BOOST_NO_MT
+#include <boost/icl/interval.hpp>
+#include <boost/icl/interval_base_set.hpp>
+#include <boost/icl/interval_set.hpp>
+#include <boost/icl/split_interval_map.hpp>
+#include <boost/pool/pool.hpp>
+#include <boost/pool/pool_alloc.hpp>
+#include <boost/pool/poolfwd.hpp>
+
+#include "core/hle/service/nvdrv/core/heap_mapper.h"
+#include "video_core/host1x/host1x.h"
+
+namespace boost {
+template <typename T>
+class fast_pool_allocator<T, default_user_allocator_new_delete, details::pool::null_mutex, 4096, 0>;
+}
+
+namespace Service::Nvidia::NvCore {
+
+using IntervalCompare = std::less<DAddr>;
+using IntervalInstance = boost::icl::interval_type_default<DAddr, std::less>;
+using IntervalAllocator = boost::fast_pool_allocator<DAddr>;
+using IntervalSet = boost::icl::interval_set<DAddr>;
+using IntervalType = typename IntervalSet::interval_type;
+
+template <typename Type>
+struct counter_add_functor : public boost::icl::identity_based_inplace_combine<Type> {
+ // types
+ typedef counter_add_functor<Type> type;
+ typedef boost::icl::identity_based_inplace_combine<Type> base_type;
+
+ // public member functions
+ void operator()(Type& current, const Type& added) const {
+ current += added;
+ if (current < base_type::identity_element()) {
+ current = base_type::identity_element();
+ }
+ }
+
+ // public static functions
+ static void version(Type&){};
+};
+
+using OverlapCombine = counter_add_functor<int>;
+using OverlapSection = boost::icl::inter_section<int>;
+using OverlapCounter = boost::icl::split_interval_map<DAddr, int>;
+
+struct HeapMapper::HeapMapperInternal {
+ HeapMapperInternal(Tegra::Host1x::Host1x& host1x) : device_memory{host1x.MemoryManager()} {}
+ ~HeapMapperInternal() = default;
+
+ template <typename Func>
+ void ForEachInOverlapCounter(OverlapCounter& current_range, VAddr cpu_addr, u64 size,
+ Func&& func) {
+ const DAddr start_address = cpu_addr;
+ const DAddr end_address = start_address + size;
+ const IntervalType search_interval{start_address, end_address};
+ auto it = current_range.lower_bound(search_interval);
+ if (it == current_range.end()) {
+ return;
+ }
+ auto end_it = current_range.upper_bound(search_interval);
+ for (; it != end_it; it++) {
+ auto& inter = it->first;
+ DAddr inter_addr_end = inter.upper();
+ DAddr inter_addr = inter.lower();
+ if (inter_addr_end > end_address) {
+ inter_addr_end = end_address;
+ }
+ if (inter_addr < start_address) {
+ inter_addr = start_address;
+ }
+ func(inter_addr, inter_addr_end, it->second);
+ }
+ }
+
+ void RemoveEachInOverlapCounter(OverlapCounter& current_range,
+ const IntervalType search_interval, int subtract_value) {
+ bool any_removals = false;
+ current_range.add(std::make_pair(search_interval, subtract_value));
+ do {
+ any_removals = false;
+ auto it = current_range.lower_bound(search_interval);
+ if (it == current_range.end()) {
+ return;
+ }
+ auto end_it = current_range.upper_bound(search_interval);
+ for (; it != end_it; it++) {
+ if (it->second <= 0) {
+ any_removals = true;
+ current_range.erase(it);
+ break;
+ }
+ }
+ } while (any_removals);
+ }
+
+ IntervalSet base_set;
+ OverlapCounter mapping_overlaps;
+ Tegra::MaxwellDeviceMemoryManager& device_memory;
+ std::mutex guard;
+};
+
+HeapMapper::HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id,
+ Tegra::Host1x::Host1x& host1x)
+ : m_vaddress{start_vaddress}, m_daddress{start_daddress}, m_size{size}, m_smmu_id{smmu_id} {
+ m_internal = std::make_unique<HeapMapperInternal>(host1x);
+}
+
+HeapMapper::~HeapMapper() {
+ m_internal->device_memory.Unmap(m_daddress, m_size);
+}
+
+DAddr HeapMapper::Map(VAddr start, size_t size) {
+ std::scoped_lock lk(m_internal->guard);
+ m_internal->base_set.clear();
+ const IntervalType interval{start, start + size};
+ m_internal->base_set.insert(interval);
+ m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int){
+ const IntervalType other{start_addr, end_addr};
+ m_internal->base_set.subtract(other);
+ });
+ if (!m_internal->base_set.empty()) {
+ auto it = m_internal->base_set.begin();
+ auto end_it = m_internal->base_set.end();
+ for (; it != end_it; it++) {
+ const VAddr inter_addr_end = it->upper();
+ const VAddr inter_addr = it->lower();
+ const size_t offset = inter_addr - m_vaddress;
+ const size_t sub_size = inter_addr_end - inter_addr;
+ m_internal->device_memory.Map(m_daddress + offset, m_vaddress + offset, sub_size, m_smmu_id);
+ }
+ }
+ m_internal->mapping_overlaps += std::make_pair(interval, 1);
+ m_internal->base_set.clear();
+ return m_daddress + (start - m_vaddress);
+}
+
+void HeapMapper::Unmap(VAddr start, size_t size) {
+ std::scoped_lock lk(m_internal->guard);
+ m_internal->base_set.clear();
+ m_internal->ForEachInOverlapCounter(m_internal->mapping_overlaps, start, size, [this](VAddr start_addr, VAddr end_addr, int value) {
+ if (value <= 1) {
+ const IntervalType other{start_addr, end_addr};
+ m_internal->base_set.insert(other);
+ }
+ });
+ if (!m_internal->base_set.empty()) {
+ auto it = m_internal->base_set.begin();
+ auto end_it = m_internal->base_set.end();
+ for (; it != end_it; it++) {
+ const VAddr inter_addr_end = it->upper();
+ const VAddr inter_addr = it->lower();
+ const size_t offset = inter_addr - m_vaddress;
+ const size_t sub_size = inter_addr_end - inter_addr;
+ m_internal->device_memory.Unmap(m_daddress + offset, sub_size);
+ }
+ }
+ const IntervalType to_remove{start, start + size};
+ m_internal->RemoveEachInOverlapCounter(m_internal->mapping_overlaps, to_remove, -1);
+ m_internal->base_set.clear();
+}
+
+} // namespace Service::Nvidia::NvCore \ No newline at end of file
diff --git a/src/core/hle/service/nvdrv/core/heap_mapper.h b/src/core/hle/service/nvdrv/core/heap_mapper.h
new file mode 100644
index 000000000..8b23638b8
--- /dev/null
+++ b/src/core/hle/service/nvdrv/core/heap_mapper.h
@@ -0,0 +1,48 @@
+// SPDX-FileCopyrightText: 2023 yuzu Emulator Project
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#pragma once
+
+#include <memory>
+
+#include "common/common_types.h"
+
+namespace Tegra::Host1x {
+class Host1x;
+} // namespace Tegra::Host1x
+
+namespace Service::Nvidia::NvCore {
+
+class HeapMapper {
+public:
+ HeapMapper(VAddr start_vaddress, DAddr start_daddress, size_t size, size_t smmu_id,
+ Tegra::Host1x::Host1x& host1x);
+ ~HeapMapper();
+
+ bool IsInBounds(VAddr start, size_t size) const {
+ VAddr end = start + size;
+ return start >= m_vaddress && end <= (m_vaddress + m_size);
+ }
+
+ DAddr Map(VAddr start, size_t size);
+
+ void Unmap(VAddr start, size_t size);
+
+ DAddr GetRegionStart() const {
+ return m_daddress;
+ }
+
+ size_t GetRegionSize() const {
+ return m_size;
+ }
+
+private:
+ struct HeapMapperInternal;
+ VAddr m_vaddress;
+ DAddr m_daddress;
+ size_t m_size;
+ size_t m_smmu_id;
+ std::unique_ptr<HeapMapperInternal> m_internal;
+};
+
+} // namespace Service::Nvidia::NvCore \ No newline at end of file
diff --git a/src/core/hle/service/nvdrv/core/nvmap.cpp b/src/core/hle/service/nvdrv/core/nvmap.cpp
index 0b2ddd980..023c070d9 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.cpp
+++ b/src/core/hle/service/nvdrv/core/nvmap.cpp
@@ -8,10 +8,12 @@
#include "common/assert.h"
#include "common/logging/log.h"
#include "core/hle/service/nvdrv/core/container.h"
+#include "core/hle/service/nvdrv/core/heap_mapper.h"
#include "core/hle/service/nvdrv/core/nvmap.h"
#include "core/memory.h"
#include "video_core/host1x/host1x.h"
+
using Core::Memory::YUZU_PAGESIZE;
namespace Service::Nvidia::NvCore {
@@ -90,10 +92,19 @@ void NvMap::UnmapHandle(Handle& handle_description) {
}
// Free and unmap the handle from the SMMU
- auto& smmu = host1x.MemoryManager();
- smmu.Unmap(handle_description.d_address, handle_description.aligned_size);
- smmu.Free(handle_description.d_address, static_cast<size_t>(handle_description.aligned_size));
+ const size_t map_size = handle_description.aligned_size;
+ if (!handle_description.in_heap) {
+ auto& smmu = host1x.MemoryManager();
+ smmu.Unmap(handle_description.d_address, map_size);
+ smmu.Free(handle_description.d_address, static_cast<size_t>(map_size));
+ handle_description.d_address = 0;
+ return;
+ }
+ const VAddr vaddress = handle_description.address;
+ auto* session = core.GetSession(handle_description.session_id);
+ session->mapper->Unmap(vaddress, map_size);
handle_description.d_address = 0;
+ handle_description.in_heap = false;
}
bool NvMap::TryRemoveHandle(const Handle& handle_description) {
@@ -188,24 +199,31 @@ DAddr NvMap::PinHandle(NvMap::Handle::Id handle, size_t session_id, bool low_are
DAddr address{};
auto& smmu = host1x.MemoryManager();
auto* session = core.GetSession(session_id);
- while ((address = smmu.Allocate(handle_description->aligned_size)) == 0) {
- // Free handles until the allocation succeeds
- std::scoped_lock queueLock(unmap_queue_lock);
- if (auto freeHandleDesc{unmap_queue.front()}) {
- // Handles in the unmap queue are guaranteed not to be pinned so don't bother
- // checking if they are before unmapping
- std::scoped_lock freeLock(freeHandleDesc->mutex);
- if (handle_description->d_address)
- UnmapHandle(*freeHandleDesc);
- } else {
- LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");
+ const VAddr vaddress = handle_description->address;
+ const size_t map_size = handle_description->aligned_size;
+ handle_description->session_id = session_id;
+ if (session->has_preallocated_area && session->mapper->IsInBounds(vaddress, map_size)) {
+ handle_description->d_address = session->mapper->Map(vaddress, map_size);
+ handle_description->in_heap = true;
+ } else {
+ while ((address = smmu.Allocate(map_size)) == 0) {
+ // Free handles until the allocation succeeds
+ std::scoped_lock queueLock(unmap_queue_lock);
+ if (auto freeHandleDesc{unmap_queue.front()}) {
+ // Handles in the unmap queue are guaranteed not to be pinned so don't bother
+ // checking if they are before unmapping
+ std::scoped_lock freeLock(freeHandleDesc->mutex);
+ if (handle_description->d_address)
+ UnmapHandle(*freeHandleDesc);
+ } else {
+ LOG_CRITICAL(Service_NVDRV, "Ran out of SMMU address space!");
+ }
}
- }
- handle_description->d_address = address;
-
- smmu.Map(address, handle_description->address, handle_description->aligned_size,
- session->smmu_id);
+ handle_description->d_address = address;
+ smmu.Map(address, vaddress, map_size, session->smmu_id);
+ handle_description->in_heap = false;
+ }
}
if (low_area_pin) {
diff --git a/src/core/hle/service/nvdrv/core/nvmap.h b/src/core/hle/service/nvdrv/core/nvmap.h
index 7dd6d26c3..4af61289e 100644
--- a/src/core/hle/service/nvdrv/core/nvmap.h
+++ b/src/core/hle/service/nvdrv/core/nvmap.h
@@ -70,6 +70,8 @@ public:
u8 kind{}; //!< Used for memory compression
bool allocated{}; //!< If the handle has been allocated with `Alloc`
+ bool in_heap{};
+ size_t session_id{};
DAddr d_address{}; //!< The memory location in the device's AS that this handle corresponds to,
//!< this can also be in the nvdrv tmem
diff --git a/src/video_core/gpu.cpp b/src/video_core/gpu.cpp
index 6ad3b94f8..609704b33 100644
--- a/src/video_core/gpu.cpp
+++ b/src/video_core/gpu.cpp
@@ -34,8 +34,6 @@
#include "video_core/renderer_base.h"
#include "video_core/shader_notify.h"
-#pragma optimize("", off)
-
namespace Tegra {
struct GPU::Impl {